ENVIRONMENTAL SCATTERPLOTS
# Define data for MA
response = names(DF_2020[c(2:17)])
quant <- 'Loss'
response = set_names(response)
# Function to output scatter plots for each var
scatter_fun = function(x, y) {
ggplot(DF_2020, aes(x = .data[[x]], y = .data[[y]])) +
geom_point() +
theme_bw()
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $ppt_July
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $ppt_Aug
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $ppt_Sept
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmin_June
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmin_July
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmin_Aug
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmin_Sept
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmean_June
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmean_July
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmean_Aug
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmean_Sept
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmax_June
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmax_July
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmax_Aug
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmax_Sept
## Warning: Removed 66 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Define data for MA
response = names(DF_2021[c(2:17)])
quant <- 'Loss'
response = set_names(response)
# Function to output scatter plots for each var
scatter_fun = function(x, y) {
ggplot(DF_2021, aes(x = .data[[x]], y = .data[[y]])) +
geom_point() +
theme_bw()
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $ppt_July
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $ppt_Aug
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $ppt_Sept
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmin_June
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmin_July
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmin_Aug
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmin_Sept
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmean_June
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmean_July
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmean_Aug
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmean_Sept
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmax_June
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmax_July
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmax_Aug
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

##
## $tmax_Sept
## Warning: Removed 35 rows containing missing values or values outside the scale range
## (`geom_point()`).

# Define data for MA
response = names(DF[c(2:17)])
quant <- 'Loss'
response = set_names(response)
# Function to output scatter plots for each var
scatter_fun = function(x, y) {
ggplot(DF, aes(x = .data[[x]], y = .data[[y]])) +
geom_point() +
theme_bw()
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June

##
## $ppt_July

##
## $ppt_Aug

##
## $ppt_Sept

##
## $tmin_June

##
## $tmin_July

##
## $tmin_Aug

##
## $tmin_Sept

##
## $tmean_June

##
## $tmean_July

##
## $tmean_Aug

##
## $tmean_Sept

##
## $tmax_June

##
## $tmax_July

##
## $tmax_Aug

##
## $tmax_Sept

DF <- DF %>% select(!contains('Sept')) %>% filter(Loss > 0)
response = names(DF[c(2:13)])
quant <- 'Loss'
response = set_names(response)
# Function to output scatter plots for each var
scatter_fun = function(x, y) {
ggplot(DF, aes(x = .data[[x]], y = .data[[y]])) +
geom_point() +
stat_density(aes(x=.data[[x]], y=(18*(..scaled..))), position="identity", geom="line", color = 'red', linewidth = 1.5) +
geom_smooth(method='loess', color = 'blue', se = FALSE) +
geom_smooth(method='lm', color = 'darkgrey', se = FALSE) +
stat_regline_equation()+
stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "*`,`~")), label.x.npc = "centre") +
theme_bw() +
ylim(0, 60)
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June
## Warning: The dot-dot notation (`..scaled..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(scaled)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $ppt_July
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $ppt_Aug
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $tmin_June
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $tmin_July
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $tmin_Aug
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $tmean_June
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $tmean_July
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

##
## $tmean_Aug
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 7 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

##
## $tmax_June
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $tmax_July
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

##
## $tmax_Aug
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 9 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

response = names(DF[c(2:13)])
quant <- 'Loss'
response = set_names(response)
# Function to output scatter plots for each var
scatter_fun = function(x, y) {
ggplot(DF, aes(x = .data[[x]])) +
geom_histogram(aes(y = ..density..), fill = 'gray', color = 'black', binwidth = 0.25) +
geom_density(color = 'red', linewidth = 1.5)+
#geom_smooth(method='loess', color = 'blue', se = FALSE) +
#geom_smooth(method='lm', color = 'blue', se = FALSE) +
#stat_regline_equation()+
#stat_cor(aes(label = paste(..rr.label.., ..p.label.., sep = "*`,`~")),
#label.x.npc = "centre") +
theme_bw()
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## $ppt_June

##
## $ppt_July

##
## $ppt_Aug

##
## $tmin_June

##
## $tmin_July

##
## $tmin_Aug

##
## $tmean_June

##
## $tmean_July

##
## $tmean_Aug

##
## $tmax_June

##
## $tmax_July

##
## $tmax_Aug

G_ppt <- DF %>%
filter(Loss > 0) %>%
select(Loss, contains('ppt'))
response = names(G_ppt[c(2:4)])
quant <- 'Loss'
scatter_fun = function(x, y) {
# Fit the nonlinear model using nls
model <- nls(Loss ~ k*exp(-1/2*(G_ppt[[x]]-mu)^2/sigma^2),
data = G_ppt,
start = c(mu = 100, sigma = 20, k = 10))
# Extract coefficients
coefficients <- coef(model)
# Plot
ggplot(data = G_ppt, aes(x = .data[[x]], y = .data[[y]])) +
geom_point() +
geom_smooth(method = 'nls',
color = 'blue',
formula = y ~ k * exp(-1/2 * (x - mu)^2 / sigma^2),
method.args = list(start = c(mu = 100, sigma = 20, k = 10)),
se = FALSE) +
theme_bw() +
theme(panel.border = element_blank()) +
annotate("text",
x = Inf, y = Inf, vjust = 2, hjust = 1,
#x = 0.9, y = 0.9, hjust = 1, vjust = 1, halign = 0.5, valign = 0.5,
label = paste("Amplitude =", round(coefficients[3], 2),
", Standard deviation =", round(coefficients[2], 2),
", Mean =", round(coefficients[1], 2)))
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## [[1]]

##
## [[2]]

##
## [[3]]

G_tmax <- DF %>%
filter(Loss > 0) %>%
select(Loss, contains('tmax'))
response = names(G_tmax[c(2:4)])
quant <- 'Loss'
scatter_fun = function(x, y) {
# Fit the nonlinear model using nls
model <- nls(Loss ~ k*exp(-1/2*(G_tmax[[x]]-mu)^2/sigma^2),
data = G_tmax,
start = c(mu = 27, sigma = 5, k = 10))
# Extract coefficients
coefficients <- coef(model)
# Plot
ggplot(data = G_tmax, aes(x = .data[[x]], y = .data[[y]])) +
geom_point() +
geom_smooth(method = 'nls',
color = 'blue',
formula = y ~ k * exp(-1/2 * (x - mu)^2 / sigma^2),
method.args = list(start = c(mu = 27, sigma = 5, k = 10)),
se = FALSE) +
theme_bw() +
theme(panel.border = element_blank()) +
annotate("text",
x = Inf, y = Inf, vjust = 2, hjust = 1,
#x = 0.9, y = 0.9, hjust = 1, vjust = 1, halign = 0.5, valign = 0.5,
label = paste("Amplitude =", round(coefficients[3], 2),
", Standard deviation =", round(coefficients[2], 2),
", Mean =", round(coefficients[1], 2)))
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
## [[1]]

##
## [[2]]

##
## [[3]]

G_tmin <- DF %>%
filter(Loss > 0) %>%
select(Loss, contains('tmin'))
response = names(G_tmin[c(2:4)])
quant <- 'Loss'
scatter_fun = function(x, y) {
# Fit the nonlinear model using nls
model <- nls(Loss ~ k*exp(-1/2*(G_tmin[[x]]-mu)^2/sigma^2),
data = G_tmin,
start = c(mu = 16, sigma = 5, k = 10))
# Extract coefficients
coefficients <- coef(model)
# Plot
ggplot(data = G_tmin, aes(x = .data[[x]], y = .data[[y]])) +
geom_point() +
geom_smooth(method = 'nls',
color = 'blue',
formula = y ~ k * exp(-1/2 * (x - mu)^2 / sigma^2),
method.args = list(start = c(mu = 16, sigma = 5, k = 10)),
se = FALSE) +
theme_bw() +
theme(panel.border = element_blank()) +
annotate("text",
x = Inf, y = Inf, vjust = 2, hjust = 1,
#x = 0.9, y = 0.9, hjust = 1, vjust = 1, halign = 0.5, valign = 0.5,
label = paste("Amplitude =", round(coefficients[3], 2),
", Standard deviation =", round(coefficients[2], 2),
", Mean =", round(coefficients[1], 2)))
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
G_tmean <- DF %>%
filter(Loss > 0) %>%
select(Loss, contains('tmean'))
response = names(test[c(2:4)])
quant <- 'Loss'
scatter_fun = function(x, y) {
# Fit the nonlinear model using nls
model <- nls(Loss ~ k*exp(-1/2*(G_tmean[[x]]-mu)^2/sigma^2),
data = G_tmean,
start = c(mu = 18, sigma = 5, k = 10))
# Extract coefficients
coefficients <- coef(model)
# Plot
ggplot(data = test, aes(x = .data[[x]], y = .data[[y]])) +
geom_point() +
geom_smooth(method = 'nls',
color = 'blue',
formula = y ~ k * exp(-1/2 * (x - mu)^2 / sigma^2),
method.args = list(start = c(mu = 16, sigma = 5, k = 10)),
se = FALSE) +
theme_bw() +
theme(panel.border = element_blank()) +
annotate("text",
x = Inf, y = Inf, vjust = 2, hjust = 1,
label = paste("Amplitude =", round(coefficients[3], 2),
", Standard deviation =", round(coefficients[2], 2),
", Mean =", round(coefficients[1], 2)))
}
# Output plots
explore.plots = map(response, ~scatter_fun(.x, quant))
explore.plots
CUSUM_ppt <- DF %>%
filter(Loss > 0) %>%
rowwise() %>%
mutate(ppt_CUSUM = sum(across(c(ppt_July, ppt_Aug))))
ggplot(data = CUSUM_ppt, aes(x=ppt_CUSUM, y=Loss)) +
geom_point() +
stat_density(aes(x=ppt_CUSUM, y=2500*(..density..)), position="identity", geom="line", color = 'red', linewidth = 1.5)

rm(DF_2020, DF_2021, explore.plots, results_df, G_ppt, G_tmax, G_tmean, G_tmin, CUSUM_ppt)
## Warning in rm(DF_2020, DF_2021, explore.plots, results_df, G_ppt, G_tmax, :
## object 'results_df' not found
## Warning in rm(DF_2020, DF_2021, explore.plots, results_df, G_ppt, G_tmax, :
## object 'G_tmean' not found
## Warning in rm(DF_2020, DF_2021, explore.plots, results_df, G_ppt, G_tmax, :
## object 'G_tmin' not found
USDA NASS DATA
USDA_21 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2021.csv')
USDA_18 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2018.csv')
USDA_16 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2016.csv')
USDA_14 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2014.csv')
#USDA_10 <- read.csv('/Users/jilliancheck/Library/CloudStorage/OneDrive-MichiganStateUniversity/Economics survey/USDA NASS/2010.csv')
# not including 2010 data because data collection is only for multistate!
USDA <- rbind (USDA_21, USDA_18, USDA_16, USDA_14)
rm(USDA_21, USDA_18, USDA_16, USDA_14)
USDA <- USDA %>%
relocate(Year, .before = 'Domain') %>% # Move 'Year' column to before 'Domain'
subset(Geo.Level != 'REGION : MULTI-STATE') %>% # 'Remove any rows containing 'REGION: MULTI-STATE'
relocate(State, .after = 'Year') %>% # Relocate 'State' to after 'Year'
select(15:28) %>% # Only retain columns 15-28
mutate(Name = str_extract(Domain.Category, "(?<=\\().*?(?=\\s*=)")) %>% # Extract the chemical names from the 'Domain.Category' and create a new column named 'Name'
relocate(Name, .after = Domain.Category) %>% # Relocate Name columns after 'Domain.Category'
subset(str_detect(Domain.Category, pattern = 'FUNGI')) %>% # Only retain rows where Domain.Category contains 'FUNGI'
select(!c('Domain', 'Domain.Category')) %>% # Keep all columns besides Domain and Domain.Category
subset(!is.na(Name)) %>% # Remove rows where Name is NA
select(!contains('CV')) # Remove columns with CV in name
# Change column names
colnames(USDA) <- c('Year', 'State', 'Name',
'Total_Fungicide_Apps_lb',
'Average_Fungicide_Apps_lb.a',
'Average_Fungicide_Apps_lb.a.y',
'Average_Number_Fungicide_Apps',
'Average_Prop_Area_Treated_.')
# Remove weird columns
USDA <- USDA %>%
subset(!Total_Fungicide_Apps_lb %in% c(' (NA)', ' (D)', ' (Z)' , '')) %>%
subset(!Average_Fungicide_Apps_lb.a %in% c(' (D)',' (NA)', ' (Z)', '')) %>%
subset(!Average_Fungicide_Apps_lb.a.y %in% c(' (D)',' (NA)', ' (Z)', '')) %>%
subset(!Average_Number_Fungicide_Apps %in% c(' (D)',' (NA)', ' (Z)', '')) %>%
subset(!Average_Prop_Area_Treated_. %in% c(' (D)',' (NA)', ' (Z)', ''))
USDA <- USDA %>%
mutate(FRAC = ifelse(Name %in% c('AZOXYSTROBIN', 'PYRACLOSTROBIN', 'TRIFLOXYSTROBIN'), '11',
ifelse(Name == 'BENZOVINDIFLUPYR', '7', '3'))) %>%
relocate(FRAC, .after = Name)
USDA <- USDA
USDA$Total_Fungicide_Apps_lb <- as.numeric(gsub(",", "", USDA$Total_Fungicide_Apps_lb))
USDA[5:9] <- sapply(USDA[5:9], as.numeric)
response = names(USDA[c(5:9)])
response = set_names(response)
State x FRAC
P <- USDA %>%
#subset(State %in% c('ILLINOIS', 'INDIANA', 'IOWA')) %>%
group_by(Year, FRAC, State) %>%
summarise(Total_Fungicide_Apps_lb = sum(Total_Fungicide_Apps_lb),
Average_Fungicide_Apps_lb.a = mean(Average_Fungicide_Apps_lb.a),
Average_Fungicide_Apps_lb.a.y = mean(Average_Fungicide_Apps_lb.a.y),
Average_Number_Fungicide_Apps = mean(Average_Number_Fungicide_Apps),
Average_Prop_Area_Treated_. = sum(Average_Prop_Area_Treated_.))
## `summarise()` has grouped output by 'Year', 'FRAC'. You can override using the
## `.groups` argument.
scatter_fun = function(x, y) {
ggplot(P, aes(x = .data[[x]], y = .data[[y]])) +
geom_col(aes(fill = FRAC), color = 'black') +
scale_x_continuous(breaks = seq(1991, 2023, 2)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
facet_grid(~State)}
explore.plots = map(response, ~scatter_fun('Year', .x))
explore.plots
## $Total_Fungicide_Apps_lb

##
## $Average_Fungicide_Apps_lb.a

##
## $Average_Fungicide_Apps_lb.a.y

##
## $Average_Number_Fungicide_Apps

##
## $Average_Prop_Area_Treated_.

State X Fungicide
P1 <- USDA %>%
subset(State %in% c('ILLINOIS', 'INDIANA', 'IOWA')) %>%
#subset(State %in% c('ILLINOIS', 'INDIANA', 'IOWA', 'MICHIGAN', 'OHIO')) %>%
group_by(Year, Name, State) %>%
summarise(Total_Fungicide_Apps_lb = sum(Total_Fungicide_Apps_lb),
Average_Fungicide_Apps_lb.a = mean(Average_Fungicide_Apps_lb.a),
Average_Fungicide_Apps_lb.a.y = mean(Average_Fungicide_Apps_lb.a.y),
Average_Number_Fungicide_Apps = mean(Average_Number_Fungicide_Apps),
Average_Prop_Area_Treated_. = sum(Average_Prop_Area_Treated_.))
## `summarise()` has grouped output by 'Year', 'Name'. You can override using the
## `.groups` argument.
scatter_fun = function(x, y) {
ggplot(P1, aes(x = .data[[x]], y = .data[[y]])) +
geom_col(aes(fill = Name), color = 'black') +
scale_x_continuous(breaks = seq(1991, 2023, 2)) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
facet_grid(~State)}
explore.plots = map(response, ~scatter_fun('Year', .x))
explore.plots
## $Total_Fungicide_Apps_lb

##
## $Average_Fungicide_Apps_lb.a

##
## $Average_Fungicide_Apps_lb.a.y

##
## $Average_Number_Fungicide_Apps

##
## $Average_Prop_Area_Treated_.
